package com.terren.spider.core.html.scheme.impl;

import java.util.ArrayList;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;

import com.terren.spider.entity.core.Entry;
import com.terren.spider.util.common.StringUtil;

import us.codecraft.webmagic.Page;

public class Che58Scheme extends BasicCustomScheme{
	
	 /**
	  * 抓分页http://search.58che.com/doc.php?keyword=%C6%FB%B3%B5&order=&page=2
	  */
	@Override
	public Set<String> findHelpPaginationUrl(Page page, Entry entry) {
		String charset ="gb2312";
		List<String> pgList = new ArrayList<String>();// 存放分页list
		boolean flag = false;
		String nextHref = page.getHtml().$(".next","href").toString();
		String prevHref = page.getHtml().$(".prev","href").toString();
		String nextUrl = StringUtil.encodeZH("http://search.58che.com" + nextHref, charset);
		if (!"".equals(nextHref)&&nextHref!=null) {//抓【下一页】链接
			pgList.add(nextUrl);
		}
		if ((!"".equals(prevHref)&&prevHref!=null)||(!"".equals(nextHref)&&nextHref!=null)) {//包含上一页或者下一页链接
			flag = true;
		}
		if (!flag) {//没有上一页链接也没有下一页链接 ，取全部
		List<String> tempList = page.getHtml().xpath("//div[@id='all_page']").links().all();// 存放分页list
			if(tempList !=null){
				for(String url : tempList){
					pgList.add(StringUtil.encodeZH(url, charset));
				}
			}
		}
		Set<String> resultSet = new HashSet<>();
		resultSet.addAll(pgList);
		return resultSet;
	}
	@Override
	public Map<String, String> getCookie(Entry entry) {
		Map<String, String> cookies = new HashMap<>();
		cookies.put("58che_mainpage_ab_test","a");
		cookies.put("58tj_uuid","1cb0bef3-a2f7-4d7c-b070-4c9e00665625");
		cookies.put("Hm_lpvt_25845b529d5d7e763490238117473eb6","1530173530");
		cookies.put("Hm_lvt_25845b529d5d7e763490238117473eb6","1530151230,1530156500,1530156588,1530165520");
		cookies.put("Hm_lpvt_f31b3bde5ef6233a36928514fb59f9cd","1528698952");
		cookies.put("wmda_uuid","2b21cd5f0fbd0957bff6e6070ac152f5");
		cookies.put("Hm_lpvt_f31b3bde5ef6233a36928514fb59f9cd","1528698952");
		return cookies;
	}
}
